# Regressions
#
# Date last modified:   2025-10-06
# Author:               Christian Vedel 
# Purpose:              This script produces the results in the in the order they
#                       are presented

# ==== Libraries ====
library(tidyverse)
library(kableExtra)
library(fixest)
library(foreach)
library(gginnards)
library(fastDummies)
source("000_functions.R")

# Small package developed in Henriques, Sharp, Tsoukli, Vedel (2024)
# https://doi.org/10.1016/j.eneco.2024.107887
# Used in coverting MB ratio to modern day real values
source("Converting MB ratios to real value/changeMB_ratio_to_real_values.R")

# ==== Load data ====
reg_data = read_csv("Data/Regression_data.csv")
mission_houses_construction = read_csv("Data/Mission_house_construction.csv")
mission_houses_panel = read_csv("Data/Mission_houses_panel.csv")
grid_panel = read_csv("Data/grid_panel.csv")
sab_summary_data = read_csv("Data/Sab_summary_data.csv")
pre_data = read_csv("Data/Pre_data.csv")
parish_level = read_csv("Data/Parish_data.csv")

# ==== Params ====
dims = c(4, 3) # Used in some plots

# ==== Figure 1 ====
# Created in Inkscape. See 'Figures/Figure1.svg'

# ==== Figure 2 ====
larsen2005 =
  data.frame(
    Year = paste(
      sep = "",
      seq(1870, 1960, by = 10), "-", seq(1879, 1969, by = 10)
    ),
    Constructed = c(9, 103, 323, 221, 91, 102, 30, 15, 16, 8) # Transcribed directly from source
  )

larsen2005 = larsen2005 %>%
  mutate(cum_constr = cumsum(Constructed)/945) %>% 
  mutate(source = "Larsen (2005)")

larsen2005 = larsen2005 %>% bind_rows(mission_houses_construction)

coef = max(larsen2005$Constructed)

p = larsen2005 %>%
  ggplot(aes(Year, Constructed, group = source, col = source)) +
  geom_point() +
  geom_line() +
  geom_point(aes(y = cum_constr*coef), shape = 4) +
  geom_line(aes(y = cum_constr*coef), lty = 2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90)
  ) +
  labs(
    y = "Number of Mission houses constructed"
  ) +
  scale_y_continuous(
    
    # Features of the first axis
    name = "Houses constructed\n(Solid line)",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis(
      ~./coef,
      name="Share of total number\n(Dashed line)",
      labels = scales::percent(seq(from = 0, to = 1, length.out = 5))
    )
  ) + 
  theme(legend.position = "bottom") + 
  labs(
    col = "Source: "
  ) +
  scale_colour_manual(
    values = c(
      "Larsen (2005)" = "#0072B2",  # blue
      "Trap 3/4"      = "#D55E00"   # orange
    )
  )

p
ggsave("Figures/Figure2.png", plot = p, width = 8, height = 6)


# ==== Table 1 ====
options(scipen = 9999) # Get rid of scientific notation in numbers
signif0 = function(x) { # Function to do pretty rounding
  ifelse(x%%1==0, round(x), round(x, 2))
}

to_sum = reg_data %>%
  select(
    IM,
    MB_ratio,
    Butter,
    Milk,
    IM_Moe,
    IM_1890,
    IM_t,
    Sabbatarian,
    Shareholders,
    Dividends
  )

# Build table
rows = list()
rows[[1]] = c("", "Units", "N", "Mean", "SD", "Min", "Q25", "Median", "Q75", "Max")

for(i in 1:length(names(to_sum))){
  var_name = names(to_sum)[i]
  var = to_sum[,i] %>% pull()
  row_var = c(
    n = length(var),
    mean = mean(var, na.rm = TRUE),
    sd = sd(var, na.rm = TRUE),
    min = min(var, na.rm = TRUE),
    q25 = quantile(var, 0.25, na.rm = TRUE),
    median = median(var, na.rm = TRUE),
    q75 = quantile(var, 0.75, na.rm = TRUE),
    max = max(var, na.rm = TRUE)
  ) %>% round(2) %>% format(nsmall = 2)
  row_var = c(var_name = var_name, unit = "", row_var)
  rows[[i+1]] = row_var
}

print_it = function(){
  r1 = rows[[1]]
  rows = rows[-1]
  cat("\n\\begin{tabular}{lrrrrrrrrrr}\n\\toprule\n\\midrule")
  print_vector_as_table_row(r1)
  cat("\n\\midrule")
  for(i in rows){
    print_vector_as_table_row(i)
  } 
  cat("\n\\midrule")
  cat("\n\\bottomrule")
  cat("\n\\end{tabular}")
}

sink("Tables/Table1.tex")
print_it()
sink()

# ==== Figure 3 ====
houses1920_tmp = mission_houses_panel %>% 
  filter(Year == 1920) %>% 
  filter(Missionshuse >= 1) %>% 
  rowwise() %>% 
  mutate(
    X_km = degrees_to_km(
      F,
      lat = lat,
      long = long - 10.22868, # Reference long
      return_both = F
    ),
    Y_km = degrees_to_km(
      T,
      lat = lat,
      long = long - 10.22868, # Reference long
      return_both = F
    )
  ) %>% 
  filter(
    !Amt %in% c("Toender", "Aabenraa", "Haderslev")
  )

houses1920_tmp = houses1920_tmp %>% 
  mutate(Mislocated = Y_km < 6150 & X_km< -25) %>% 
  filter(!Mislocated)

p1 = grid_panel %>%
  filter(Year == 1920) %>%
  ggplot(aes(X_km, Y_km, fill = MP_Missionshuse)) +
  geom_tile(width = 1,
            height = 1) +
  coord_fixed() +
  # scale_fill_distiller(palette = "YlOrRd", high = "Rd") +
  geom_point(
    shape = 4,
    size = 0.5,
    inherit.aes = FALSE,
    aes(X_km, Y_km, col = "Location of\nMission houses"), 
    data = houses1920_tmp
  ) +
  scale_color_manual(values = "black") +
  theme_bw() +
  labs(fill = "Potential influence\nof Inner Mission",
       col = "") + 
  theme(legend.position = "bottom") +
  scale_fill_gradient(
    low = "#ffb700",
    high = "#9e0303"
  ) + 
  theme_void()

p1
ggsave("Figures/Figure3.png", plot = p1, height = 5, width = 8)

# ==== Figure 4 ====
coef = max(sab_summary_data$Sabbatarian)/(max(sab_summary_data$Total))

p = sab_summary_data %>%
  ggplot(aes(as_fctnum(Year), Sabbatarian)) +
  geom_point(aes(col = "Sunday Closed")) +
  geom_line(aes(col = "Sunday Closed")) +
  geom_point(aes(y = Total*coef, col = "All"), shape = 4) +
  geom_line(aes(y = Total*coef, col = "All"), lty = 2) +
  theme_bw() +
  labs(
    y = "Number of Creameries closed Sunday\n(Solid line)",
    x = "Year"
  ) +
  scale_y_continuous(

    # Features of the first axis
    name = "Number of Creameries closed Sunday\n(Solid line)",

    # Add a second axis and specify its features
    sec.axis = sec_axis(
      ~./coef,
      name="Total number of Creameries\n(Dashed line)"
    )
  ) +
  theme(legend.position = "bottom") +
  labs(
    col = ""
  ) +
  scale_color_manual(values = c("Sunday Closed" = "#0072B2", "All" = "#D55E00"))

p
ggsave("Figures/Figure4.png", plot = p, width = 8, height = 6)

# ==== Table 2 ====
mod1 = feols(
  MB_ratio ~ IM | Creamery_ID + Year,
  data = reg_data
)

mod2 = feols(
  MB_ratio ~ IM | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data
)

mod3 = feols(
  MB_ratio ~ 1 | Creamery_ID + Year + soil_type_Year + NUTS2_year | IM ~ IM_1890*IM_t,
  data = reg_data
)

mod3$iv_first_stage

print_it = function(){
  cat("\ ==== Table with clustered standard errors: ====\n")
  etable(
    mod1, mod2, mod3,
    fitstat = c("n","r2", "wr2","kpr"),
    tex = TRUE
  ) %>% print()
  
  cat("\n === Table with conley standard errors: ====\n")
  etable(
    mod1, mod2, mod3,
    fitstat = c("n","r2", "wr2","kpr"),
    tex = TRUE,
    vcov = conley(cutoff = 50)
  ) %>% print()
  
  cat("\n ==== Conversion to 2010 USD equiv.: ==== \n")
  col1 = MB_to_USD(mod1$coeftable[1,1], data0 = reg_data) %>% 
    select(Delta_real_USD, Delta_nom_DKK_of_dividends) %>% 
    pivot_longer(cols = Delta_real_USD:Delta_nom_DKK_of_dividends, values_to = "col1")
  col2 = MB_to_USD(mod2$coeftable[1,1], data0 = reg_data) %>% 
    select(Delta_real_USD, Delta_nom_DKK_of_dividends) %>% 
    pivot_longer(cols = Delta_real_USD:Delta_nom_DKK_of_dividends, values_to = "col2")
  col3 = MB_to_USD(mod3$coeftable[1,1], data0 = reg_data) %>% 
    select(Delta_real_USD, Delta_nom_DKK_of_dividends) %>% 
    pivot_longer(cols = Delta_real_USD:Delta_nom_DKK_of_dividends, values_to = "col3")
  
  col1 %>% left_join(col2, by = "name") %>% left_join(col3, by = "name")  %>% print()
  
}

sink("Tables/Table2.tex")
print_it()
sink()

# ==== Table 3 ====

mod0 = feols(
  IM ~ log(Pop),
  data = pre_data %>% filter(Year == 1890)
)

mod1 = feols(
  IM ~ log(Pop) | GIS_ID + Year + soil_type + NUTS2,
  data = pre_data %>% filter(Year < 1898)
)


sink("Tables/Table3.tex")
etable(mod0, mod1, tex = TRUE) %>% print()
sink()

# ==== Figure 5 ====
df_mod2 = data.frame(
  y = feols( # Residualized y
    MB_ratio ~ 1 | Creamery_ID + Year + soil_type_Year + NUTS2_year,
    data = reg_data
  ) %>% residuals(),
  
  x = feols( # Residualized x
    IM ~ 1 | Creamery_ID + Year + soil_type_Year + NUTS2_year,
    data = reg_data
  ) %>% residuals()
  
)

p1 = binscatter(df_mod2, segment = FALSE) +
  theme_minimal() + 
  labs(
    y = "MB ratio residualized by FE",
    x = "IM residualized by FE"
  )

p1
ggsave("Figures/Figure5.png", width = dims[1], height = dims[2], plot = p1)


# ==== Figure 6 - setup ====
mod1_ref = feols(
  MB_ratio ~ IM | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data
)

# ==== Figure 6a ====
# 6 (a): Region interaction
res = foreach(r = sort(unique(reg_data$NUTS2)), .combine = "bind_rows") %do% {
  run_interaction(r, "NUTS2", binder = "_")
} 

res = res %>% 
  mutate(
    var = rownames(.)
  ) %>%
  filter(grepl("IM:NUTS2_", var)) %>%
  mutate(
    region = gsub("IM:NUTS2_", "", var),
    region = case_when(
      region == "Hovedstaden" ~ "Capital Region",
      region == "Syddanmark" ~ "Southern Denmark",
      region == "Sjaelland" ~ "Region Zealand",
      region == "Midtjylland" ~ "Central Jutland",
      region == "Nordjylland" ~ "Northern Jutland"
    ),
    region = factor(
      region, 
      levels = c(
        "Capital Region",
        "Region Zealand",
        "Southern Denmark",
        "Central Jutland",
        "Northern Jutland"
      )
    ),
    Lower = composite_effect - 1.96*composite_se,
    Upper = composite_effect + 1.96*composite_se
  )

p1 = res %>%
  ggplot(aes(region, composite_effect)) + 
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin = Lower, ymax = Upper)) +
  theme_bw() +
  labs(
    x = "Region (interaction with IM)",
    y = "Combined effect:\nIM (std) + Region x IM (std)"
  ) + 
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = coef(mod1_ref), lty = 2) +
  theme(
    axis.text.x = element_text(angle = 45)
  )

p1
ggsave("Figures/Figure6a.png", width = dims[1], height = dims[2])


# ==== Figure 6b ====
# 6 (b): Distance to Copenhagen
reg_data$distance_copenhagen_percentiles = reg_data$distance_copenhagen %>% 
  cut(breaks = quantile(., seq(0,1, by = 0.1), na.rm = TRUE), include.lowest = TRUE) %>% 
  as.numeric() %>% 
  paste0("Q",.)

reg_data = reg_data %>% dummy_cols("distance_copenhagen_percentiles")

res = foreach(r = sort(unique(reg_data$distance_copenhagen_percentiles)), .combine = "bind_rows") %do% {
  run_interaction(r, "distance_copenhagen", binder = "_percentiles_")
}

res = res %>% 
  mutate(
    var = rownames(.)
  ) %>% 
  filter(
    grepl("IM:distance_copenhagen_percentiles", var)
  ) %>% 
  mutate(
    q = gsub("IM:distance_copenhagen_percentiles_Q", "", var) %>% as.numeric()
  ) %>% 
  mutate(
    Lower = Estimate - 1.96*Std..Error,
    Upper = Estimate + 1.96*Std..Error
  )

p1 = res %>% 
  mutate(
    var = rownames(.)
  ) %>% 
  filter(
    grepl("IM:distance_copenhagen_percentiles", var)
  ) %>% 
  mutate(
    q = gsub("IM:distance_copenhagen_percentiles_Q", "", var) %>% as.numeric()
  ) %>% 
  mutate(
    Lower = composite_effect - 1.96*composite_se,
    Upper = composite_effect + 1.96*composite_se
  ) %>% 
  ggplot(aes(q, composite_effect)) + 
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin = Lower, ymax = Upper)) +
  theme_bw() +
  scale_x_continuous(breaks = 1:10) + 
  labs(
    x = "Decile (interaction with IM)",
    y = "Combined effect:\nIM (std) + Decile x IM (std)"
  ) + 
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = coef(mod1_ref), lty = 2)

p1
ggsave("Figures/Figure6b.png", width = dims[1], height = dims[2])

# ==== Figure 6c ====
# 6 (c): Cows
# Size - cows
reg_data$Cows_now_percentiles = reg_data$Cows_now %>% 
  cut(breaks = quantile(., seq(0,1, by = 0.1), na.rm = TRUE), include.lowest = TRUE) %>% 
  as.numeric() %>% 
  paste0("Q",.)

reg_data = reg_data %>% dummy_cols("Cows_now_percentiles")

res = foreach(r = sort(unique(reg_data$Cows_now_percentiles)), .combine = "bind_rows") %do% {
  run_interaction(r, "Cows_now", binder = "_percentiles_")
}

res = res %>% 
  mutate(
    var = rownames(.)
  ) %>% 
  filter(
    grepl("IM:Cows_now_percentiles", var)
  ) %>% 
  mutate(
    q = gsub("IM:Cows_now_percentiles_Q", "", var) %>% as.numeric()
  ) %>% 
  mutate(
    Lower = composite_effect - 1.96*composite_se,
    Upper = composite_effect + 1.96*composite_se
  )

p1 = res %>% 
  ggplot(aes(q, composite_effect)) + 
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin = Lower, ymax = Upper)) +
  theme_bw() +
  scale_x_continuous(breaks = 1:10) + 
  labs(
    x = "Decile (interaction with IM)",
    y = "Combined effect:\nIM (std) + Decile x IM (std)"
  ) + 
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = coef(mod1_ref), lty = 2)

p1
ggsave("Figures/Figure6c.png", width = dims[1], height = dims[2])

# ==== Figure 6d ====
# 6 (d): Shareholders
# Size - shareholders
reg_data$Shareholders_percentiles = reg_data$Shareholders %>% 
  cut(breaks = quantile(., seq(0,1, by = 0.1), na.rm = TRUE), include.lowest = TRUE) %>% 
  as.numeric() %>% 
  paste0("Q",.)

reg_data = reg_data %>% dummy_cols("Shareholders_percentiles")

res = foreach(r = sort(unique(reg_data$Shareholders_percentiles)), .combine = "bind_rows") %do% {
  run_interaction(r, "Shareholders", binder = "_percentiles_")
}

res = res %>% 
  mutate(
    var = rownames(.)
  ) %>% 
  filter(
    grepl("IM:Shareholders_percentiles", var)
  ) %>% 
  mutate(
    q = gsub("IM:Shareholders_percentiles_Q", "", var) %>% as.numeric()
  ) %>% 
  mutate(
    Lower = composite_effect - 1.96*composite_se,
    Upper = composite_effect + 1.96*composite_se
  )

p1 = res %>% 
  ggplot(aes(q, composite_effect)) + 
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin = Lower, ymax = Upper)) +
  theme_bw() +
  scale_x_continuous(breaks = 1:10) + 
  labs(
    x = "Decile (interaction with IM)",
    y = "Combined effect:\nIM (std) + Decile x IM (std)"
  ) + 
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = coef(mod1_ref), lty = 2)

p1
ggsave("Figures/Figure6d.png", width = dims[1], height = dims[2])

# ==== Figure 6e ====
# 6 (e): Population
# Local population
reg_data = reg_data %>% 
  mutate(Popdense = Pop1901/area_parish)

reg_data$Popdense_percentiles = reg_data$Popdense %>% 
  cut(breaks = quantile(., seq(0,1, by = 0.1), na.rm = TRUE), include.lowest = TRUE) %>% 
  as.numeric() %>% 
  paste0("Q",.)

reg_data = reg_data %>% dummy_cols("Popdense_percentiles")

res = foreach(r = sort(unique(reg_data$Popdense_percentiles)), .combine = "bind_rows") %do% {
  run_interaction(r, "Popdense", binder = "_percentiles_")
}

res = res %>% 
  mutate(
    var = rownames(.)
  ) %>% 
  filter(
    grepl("IM:Popdense_percentiles", var)
  ) %>% 
  mutate(
    q = gsub("IM:Popdense_percentiles_Q", "", var) %>% as.numeric()
  ) %>% 
  mutate(
    Lower = composite_effect - 1.96*composite_se,
    Upper = composite_effect + 1.96*composite_se
  )

p1 = res %>% 
  ggplot(aes(q, composite_effect)) + 
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin = Lower, ymax = Upper)) +
  theme_bw() +
  scale_x_continuous(breaks = 1:10) + 
  labs(
    x = "Decile (interaction with IM)",
    y = "Combined effect:\nIM (std) + Decile x IM (std)"
  ) + 
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = coef(mod1_ref), lty = 2)

p1
ggsave("Figures/Figure6e.png", width = dims[1], height = dims[2])

# ==== Figure 6f ====
# 6 (f): Boulder clay

# Local soiltype ML share
reg_data$ML_pct_percentiles = reg_data$ML_pct %>% 
  cut(breaks = quantile(., seq(0,1, by = 1/8), na.rm = TRUE), include.lowest = TRUE) %>% 
  as.numeric() %>% 
  paste0("Q",.)

reg_data = reg_data %>% dummy_cols("ML_pct_percentiles")

res = foreach(r = sort(unique(reg_data$ML_pct_percentiles)), .combine = "bind_rows") %do% {
  run_interaction(r, "ML_pct", binder = "_percentiles_")
}

res = res %>% 
  mutate(
    var = rownames(.)
  ) %>% 
  filter(
    grepl("IM:ML_pct_percentiles", var)
  ) %>% 
  mutate(
    q = gsub("IM:ML_pct_percentiles_Q", "", var) %>% as.numeric()
  ) %>% 
  mutate(
    Lower = composite_effect - 1.96*composite_se,
    Upper = composite_effect + 1.96*composite_se
  )

p1 = res %>% 
  ggplot(aes(q, composite_effect)) + 
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin = Lower, ymax = Upper)) +
  theme_bw() +
  scale_x_continuous(breaks = 1:8) + 
  labs(
    x = "Octile (interaction with IM)",
    y = "Combined effect:\nIM (std) + Octile x IM (std)"
  ) + 
  geom_hline(yintercept = 0) + 
  geom_hline(yintercept = coef(mod1_ref), lty = 2)

p1
ggsave("Figures/Figure6f.png", width = dims[1], height = dims[2])


# ==== Table 4 ====
# Dummy in parish
mod1 = feols(
  MB_ratio ~ Missionhouse | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data %>% drop_na(Missionhouse)
)

# Distance to mission house
mod2 = feols(
  MB_ratio ~ Dist_mission_std | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data %>% drop_na(Missionhouse)
)

# Number of mission houses within 25 km
mod3 = feols(
  MB_ratio ~ N_mission25_std | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data %>% drop_na(Missionhouse)
)

# Inverse sqrt distance decay
mod4 = feols(
  MB_ratio ~ IM_sqrt_std | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data %>% drop_na(Missionhouse)
)


sink("Tables/Table4.tex")
etable(
  mod1, mod2, mod3, mod4, 
  fitstat = c("n","r2", "wr2"),
  tex = TRUE
) %>% print()

etable(
  mod1, mod2, mod3, mod4, 
  fitstat = c("n","r2", "wr2"),
  tex = TRUE,
  vcov = conley(cutoff = 50)
) %>% print()
sink()

# ==== Table 5 ====
reg_data %>% group_by(GIS_ID) %>%
     summarise(x = mean(Sabbatarian)) %>%
     filter(x != 0) %>%
     filter(x != 1)

mod1 = feols(
  Sabbatarian ~ IM | Year + GIS_ID + soil_type_Year + NUTS2_year,
  data = reg_data
)

mod2 = feols(
  Sabbatarian ~ IM | Year + soil_type_Year + NUTS2_year,
  data = reg_data
)

mod3 = feols(
  log(Dividends) ~ IM | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data
)

mod4 = feols(
  log(Shareholders) ~ IM | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data
)

mod5 = feols(
  log(Collection_routes) ~ IM | Creamery_ID + Year + soil_type_Year + NUTS2_year,
  data = reg_data
)

mod6 = feols(
  More_than1 ~ IM | GIS_ID + Year + soil_type_Year + NUTS2_year,
  data = parish_level
)


sink("Tables/Table5.tex")
etable(
  list(mod1, mod2, mod3, mod4, mod5, mod6),
  tex = TRUE
) %>% print()

etable(
  list(mod1, mod2, mod3, mod4, mod5, mod6),
  tex = TRUE,
  vcov = conley(cutoff = 50)
) %>% print()
sink()
